# Load fire damage data from other states
pacman::p_load(data.table, sf, readxl)

source("code/globals.R")

rawdatafolder <- file.path(INPUT_PUBLIC, "DamageData/NotDINS/notCA")

cleandata <- data.table(matrix(data = NA, nrow = 0, ncol = 9))
cleandata <- data.table(
  State = character(), FIPS = character(), APN = character(), APNSeq = numeric(),
  Address = character(), FireName = character(), FireDate = character(),
  DamageLevel = character(), NotesField = character(), howmanystructures = numeric()
)
cleandata$FireDate <- as.POSIXct(cleandata$FireDate, format = "%Y-%m-%d")
cleandata$DamageLevel <- factor(cleandata$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# ARIZONA ----

## Yavapai County ----

#---- YARNELL FIRE 2013
yv <- read_excel(file.path(rawdatafolder, "Arizona_Yavapai/2013 Yarnell Fire Flags.xlsx")) %>%
  rename(APN = PARCELNO, DamageLevel = FLAGTYPEDESCRIPTION, Address = PROPADD) %>%
  mutate(
    NotesField = "",
    FireName = "Yarnell",
    FireDate = as.POSIXct("2013-06-28", format = "%Y-%m-%d"),
    State = "AZ",
    FIPS = "04025",
    APNSeq = NA,
    howmanystructures = NA
  )

# Recode the damage variable
stopifnot(anyNA(yv$DamageLevel) == FALSE)
table(yv$DamageLevel, useNA = "always")
yv$DamageLevel[yv$DamageLevel == "Imps Destroyed"] <- "Destroyed"
yv$DamageLevel[yv$DamageLevel == "Imps Partially Damaged"] <- "Minor"
table(yv$DamageLevel, useNA = "always")
yv$DamageLevel <- factor(yv$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# Handle duplicate APNs, using leading digitt of account number based on instructions from assessor in note above.
anyDuplicated(yv$APN)

#- address dupes
yv <- yv %>%
  mutate(mobilehome = substr(ACCOUNTNO, 1, 1) == "M") %>%
  arrange(APN, mobilehome) %>% # sort mobile homes to end of APN groups (TRUE sorts after FALSE)
  group_by(APN) %>%
  filter(row_number() == 1)

yv <- dplyr::select(yv, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

cleandata <- rbind(cleandata, yv)
rm(yv)

#-- Goodwin Fire 2017 -----#
gw <- read_excel(file.path(rawdatafolder, "Arizona_Yavapai/2017 Goodwin Fire Flags.xlsx")) %>%
  rename(APN = PARCELNO, DamageLevel = FLAGTYPEDESCRIPTION, Address = PROPADD) %>%
  mutate(
    NotesField = "",
    FireName = "Goodwin",
    FireDate = as.POSIXct("2017-06-24", format = "%Y-%m-%d"),
    State = "AZ",
    FIPS = "04025",
    APNSeq = NA,
    howmanystructures = NA
  )

# Recode the damage variable
stopifnot(anyNA(gw$DamageLevel) == FALSE)
table(gw$DamageLevel, useNA = "always")
gw$DamageLevel[gw$DamageLevel == "Imps Removed, Fire Destroyed"] <- "Destroyed"
gw$DamageLevel[gw$DamageLevel == "Imps Relisted, Fire Damaged"] <- "Minor"
table(gw$DamageLevel, useNA = "always")
gw$DamageLevel <- factor(gw$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# Handle duplicate APNs. None!
anyDuplicated(gw$APN)

gw <- dplyr::select(gw, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

cleandata <- rbind(cleandata, gw)
rm(gw)

# COLORADO -----

## El Paso County -----

ep <- read_excel(file.path(rawdatafolder, "Colorado_ElPaso/Wildfire CORA Request.xlsx")) %>%
  rename(PercentDamage = `Percent Damaged/ Destroyed`) %>%
  rename(APN = Parcel) %>%
  rename(FireName = Fire) %>%
  rename(FireDate = Date) %>%
  rename(APNSeq = Bldg) %>%
  rename(NotesField = Type) %>%
  mutate(State = "CO") %>%
  mutate(FIPS = "08041")

# Drop outbuildings, garages, commercial; keep mobile homes and residences
table(ep$NotesField, useNA = "always")
ep <- ep[ep$NotesField %in% c("Mobile Home", "Residence"), ]

ep$DamageLevel <- NA
table(ep$PercentDamage, useNA = "always")
ep$DamageLevel[ep$PercentDamage %in% c(0.25, 0.5)] <- "Minor"
ep$DamageLevel[ep$PercentDamage %in% c(0.75, 1)] <- "Destroyed"
ep$DamageLevel <- factor(ep$DamageLevel, levels = c("Minor", "Destroyed"), ordered = TRUE)
stopifnot(!is.na(ep$DamageLevel))
ep$PercentDamage <- NULL

## Address duplicate APNs
ep[duplicated(ep$APN) | duplicated(ep$APN, fromLast = TRUE), ]
ep <- ep %>%
  arrange(APN, NotesField, DamageLevel) %>%
  group_by(APN) %>%
  mutate(copy = seq_len(n())) %>%
  filter(copy == n()) %>% # Keep last within each dupe APN group, which should be most damage and most significant structure type (since Residence sorts after Mobile Home alphabetically)
  ungroup() %>%
  mutate(copy = NULL)

ep$howmanystructures <- NA

cleandata <- rbind(cleandata, ep)
rm(ep)

## Grand County -----

classes <- rep("character", 25)
classes[5] <- "integer" # zip code
classes[6] <- "numeric" # ot phone
classes[19:21] <- c("integer", "integer", "integer") # num structures, destroyedH, numotherdestroyed


etf <- read.csv(file.path(rawdatafolder, "Colorado_Grand/StateIDA_FINAL_20201102.csv"),
  stringsAsFactors = FALSE,
  header = TRUE,
  colClasses = classes
)

# Make sure parcel numbers imported with no loss
stopifnot(round(as.numeric(etf$parcelNUM), 0) == as.numeric(etf$parcelNUM))
etf$ParcelNUM <- as.numeric(etf$ParcelNUM)

# Two columns both report parcel numbers; make sure identical then drop one
stopifnot(etf$parcel_ == etf$ParcelNUM)
etf$parcel_ <- NULL

etf <- etf %>%
  rename(DamageLevel = level_of_d, APN = ParcelNUM) %>%
  mutate(Address = paste(street_add, city, sep = " ")) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) %>% # repeat because there are some " NA NA " strings, which screws up spacing... I'm sure this could be done smarter.
  mutate(NotesField = paste("numstruc", NumOfStruc, "DestroyedH", DestroyedH, "NumOtherDe", NumOtherDe, remarks)) %>%
  mutate(FireName = "EastTroublesome", FIPS = "08049", State = "CO") %>%
  mutate(FireDate = as.POSIXct("2020-10-21", format = "%Y-%m-%d")) %>%
  mutate(APNSeq = NA)

table(etf$DamageLevel, useNA = "always")
# update values for inaccessible homes based on "QAQC" field in the data
etf$DamageLevel[etf$APN == 119111100001] <- "affected"
etf$DamageLevel[etf$APN == 107724400012] <- "destroyed"
etf$DamageLevel[etf$APN == 119121200066] <- "destroyed"
etf$DamageLevel[etf$APN == 118924100012] <- "destroyed"

# drop 'affected', which appears to mean essentially zero structure damage based on Remarks field
etf <- etf[etf$DamageLevel != "affected", ]
etf$DamageLevel[etf$DamageLevel == "destroyed"] <- "Destroyed"
etf$DamageLevel[etf$DamageLevel == "minor"] <- "Minor"
etf$DamageLevel[etf$DamageLevel == "minor?"] <- "Minor"
table(etf$DamageLevel, useNA = "always")
etf$DamageLevel <- factor(etf$DamageLevel, levels = c("Minor", "Destroyed"), ordered = TRUE)

### NOTE: Based on the remarks field, it seems like parcels with multiple buildings have been listed as destroyed only in the "main home" is lost. This is helpful for thinking about parcels in ZTRAX data with multiple structures on them.


## Handle dupe APNs
#-pick which to keep
x <- etf[duplicated(etf$APN) | duplicated(etf$APN, fromLast = TRUE), ] %>%
  arrange(APN, DamageLevel) %>%
  filter(DamageLevel == "destroyed") %>%
  group_by(APN) %>%
  mutate(copy = row_number(), copies = n()) %>%
  filter(copy == copies)
#-make a list of those to drop (the complement of those keeping)
y <- etf[duplicated(etf$APN) | duplicated(etf$APN, fromLast = TRUE), ] %>%
  dplyr::select(globalid) %>%
  setdiff(x[, "globalid"])
#-strip the ones to drop from data
w <- which(etf$globalid %in% y$globalid)
etf <- etf[-w, ]

etf <- dplyr::select(etf, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel)
etf$howmanystructures <- NA

cleandata <- rbind(cleandata, etf)
rm(etf, classes, w, x, y)

## Larimer County ----

################### -- Cameron Peak Fire
cpf <- read_excel(file.path(rawdatafolder, "Colorado_Larimer/CameronPeakFire_CPF Area.xlsx")) %>%
  rename(APN = PARCELNO, APNSeq = IMPNO, FireDate = DATEOFDESTRUCTION) %>%
  mutate(NotesField = paste(ACCTTYPE, ACCOUNTNO, OCCCODEDESCRIPTION, sep = "_")) %>%
  mutate(Address = paste(STREETNO, PREDIRECTION, STREETNAME, STREETTYPE, POSTDIRECTION, UNITNAME, PROPERTYCITY, sep = " ")) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) %>% # repeat because there are some " NA NA " strings, which screws up spacing... I'm sure this could be done smarter.
  mutate(FireName = "CameronPeakFire", FIPS = "08069", State = "CO") %>%
  mutate(FireDate = as.POSIXct(FireDate, format = "%Y-%m-%d")) %>%
  mutate(FireDate = min(FireDate))

## Subset to home-like structures (including cabins)
table(cpf$OCCCODEDESCRIPTION, useNA = "always")
keeptypes <- c(
  "A Frame Res", "Cabin On Res", "Cabin On Ag", "Log On Ag", "Log On Res",
  "Manufactured Home On Real", "MH on Ag", "MH on Vacant Land", "Purged Manufactured Home",
  "Res Not Integral To Ag", "Single Family Residential", "Single Family Residential On Ag"
)
cpf <- cpf[cpf$OCCCODEDESCRIPTION %in% keeptypes, ]
table(cpf$OCCCODEDESCRIPTION, useNA = "always")
rm(keeptypes)

# Address duplicate APNs (none!)
cpf[duplicated(cpf$APN) | duplicated(cpf$APN, fromLast = TRUE), ]


cpf <- dplyr::select(cpf, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField)
cpf$DamageLevel <- "Destroyed"
cpf$DamageLevel <- factor(cpf$DamageLevel, levels = c("Minor", "Destroyed"), ordered = TRUE)
cpf$howmanystructures <- NA

# Add to cleaned dataset
cleandata <- rbind(cleandata, cpf)
rm(cpf)

################### -- Highpark Fire
#-note no damage and minor damage are treated same in my cleaning here
hp <- read_excel(file.path(rawdatafolder, "Colorado_Larimer/HPF - Damage ReportJuly6th.xlsx"))

## Damage is reported in multiple columns depending on record - consolidate:
hp$DamageLevel <- hp$`Latest ARC`
sum(is.na(hp$DamageLevel))
## -- Using Assessor Field Notes
destroyedstrings <- tolower(c(
  "all imps destroyed",
  "all improvements destroyed",
  "res destroyed",
  "destroyed. all gone.",
  "destroyed. gone",
  "house destroyed",
  "res and nav bld destroyed",
  "res and garage destroyed",
  "deg june 28 - cottage destroyed", # this one is just a cottage, no SFR on parcel
  "dps june 28. cabin destroyed", # just a cabin parcel, no SFR
  "Camp trailer destroyed. Dps 28." # no SFR on parcel
))
for (i in destroyedstrings) {
  hp$DamageLevel[is.na(hp$DamageLevel) &
    !is.na(hp$AppraiserFieldNotes) &
    grepl(i, tolower(hp$AppraiserFieldNotes), fixed = TRUE)] <-
    "Destroyed"
  print(sum(is.na(hp$DamageLevel)))
}
rm(i, destroyedstrings)

nodamagestrings <- tolower(c(
  "res okay",
  "res ok",
  "house ok",
  "no visible damage",
  "res is ok",
  "imps look ok",
  "kbm june 25 res and nav ok",
  "Minor - Some perforations in plastic OB roof. JRM June 21",
  "DEG June 28 Smaller OB destroyed"
))
for (j in nodamagestrings) {
  hp$DamageLevel[is.na(hp$DamageLevel) &
    !is.na(hp$AppraiserFieldNotes) &
    grepl(j, tolower(hp$AppraiserFieldNotes), fixed = TRUE)] <-
    "NVD to Res"
  print(sum(is.na(hp$DamageLevel)))
}
rm(j, nodamagestrings)

unknownstrings <- tolower(c(
  "multiple minor structures - unable to verify status",
  "Trailer/Camper on property burned - unable to located additional cabin",
  "Could not locate - find on aerial - JMP June 21",
  "DPS June 28. cottage here not on record. Several outbuildings destroyed.",
  "No Access"
))


for (k in unknownstrings) {
  hp$DamageLevel[is.na(hp$DamageLevel) &
    !is.na(hp$AppraiserFieldNotes) &
    grepl(k, tolower(hp$AppraiserFieldNotes), fixed = TRUE)] <-
    "Unknown"
  print(sum(is.na(hp$DamageLevel)))
}
rm(k, unknownstrings)

## -- Using "EOM list"
hp$DamageLevel[is.na(hp$DamageLevel) &
  !is.na(hp$`EOM List`) &
  grepl("N - Res Destroyed", hp$`EOM List`, fixed = TRUE)] <-
  "Destroyed"


hp$DamageLevel[is.na(hp$DamageLevel) &
  !is.na(hp$`EOM List`) &
  grepl("Y - Res standing", hp$`EOM List`, fixed = TRUE)] <-
  "NVD to Res"
print(sum(is.na(hp$DamageLevel)))

## Recategorize levels of damage variable
stopifnot(anyNA(hp$DamageLevel) == FALSE)
table(hp$DamageLevel, useNA = "always")
hp$DamageLevel[hp$DamageLevel == "Affected"] <- "Minor"
hp$DamageLevel[hp$DamageLevel == "Major"] <- "Minor"
hp$DamageLevel[hp$DamageLevel == "Inaccessible"] <- "Unknown"
hp$DamageLevel[hp$DamageLevel == "NVD to Res"] <- "NoDamage"
table(hp$DamageLevel, useNA = "always")
hp$DamageLevel <- factor(hp$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

## Other cleanup
hp <- hp %>%
  rename(APN = PARCELNB, Address = `SITE ADDRESS`) %>%
  mutate(NotesField = paste(`BLDG DESCRIPTION`, AppraiserFieldNotes, sep = "_")) %>%
  dplyr::filter(BLDGS > 0) %>% # drop lots with no structures
  dplyr::select(APN, Address, DamageLevel, NotesField, SFR, CabinCottage, Outbuilding, Mobile) %>%
  mutate(APNSeq = NA) %>%
  mutate(FireName = "HighparkFire", FIPS = "08069", State = "CO") %>%
  mutate(FireDate = as.POSIXct("2012-06-09", format = "%Y-%m-%d"))

# Address duplicate APNs -- solved by removing outbuildings/other where SFRs exist
hp <- hp %>%
  arrange(APN, SFR, CabinCottage, Outbuilding, Mobile, desc(DamageLevel)) %>% # Sort SFRs first, then cabins etc... within groups, highest damage first
  group_by(APN) %>%
  filter(row_number() == 1)

hp <- dplyr::select(hp, State, FIPS, APN, APNSeq, Address, FireName, FireDate, DamageLevel, NotesField)
hp$howmanystructures <- NA

# Add to cleaned dataset
cleandata <- rbind(cleandata, hp)
rm(hp)

################### -- Woodlands Height Fire
wh <- read_excel(file.path(rawdatafolder, "Colorado_Larimer/Woodlands Height Fire_WHF Area.xlsx")) %>%
  rename(APN = PARCELNO, APNSeq = IMPNO, FireDate = DATEOFDESTRUCTION, AddlInfo = ...15) %>%
  mutate(NotesField = paste(ATTRIBUTESUBTYPE, ACCOUNTNO, ACCTTYPE, OCCCODE, OCCCODEDESCRIPTION, AddlInfo, sep = "_")) %>%
  mutate(FireName = "WoodlandsHeightFire", FIPS = "08069", State = "CO") %>%
  mutate(FireDate = as.POSIXct(FireDate, format = "%Y-%m-%d")) %>%
  mutate(FireDate = min(FireDate)) %>%
  mutate(Address = paste(STREETNO, PREDIRECTION, STREETNAME, STREETTYPE, POSTDIRECTION, PROPERTYCITY, sep = " ")) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) %>%
  mutate(Address = gsub(" NA ", " ", Address, fixed = TRUE)) # repeat because there are some " NA NA " strings, which screws up spacing... I'm sure this could be done smarter.

## Drop homes where only detached garage affected
wh <- filter(wh, AddlInfo != "*Only Destroyed Detached Garage" | is.na(AddlInfo))

## Subset to homes
table(wh$OCCCODEDESCRIPTION, useNA = "always")
wh <- wh[wh$OCCCODEDESCRIPTION == "Single Family Residential", ]

# Address duplicate APNs (none!)
wh[duplicated(wh$APN) | duplicated(wh$APN, fromLast = TRUE), ]

## Assume destroyed except 2 homes where notes indicate minor damage
wh$DamageLevel <- "Destroyed"
wh$DamageLevel[wh$AddlInfo == "*Minor Damage" & !is.na(wh$AddlInfo)] <- "Minor"

wh <- dplyr::select(wh, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel)
wh$howmanystructures <- NA

# Add to cleaned dataset
cleandata <- rbind(cleandata, wh)
rm(wh)

# OREGON -----

## Jackson County ----

jk <- read_excel(file.path(rawdatafolder, "Oregon_Jackson/almedadatarequest/StructureSamples.xlsx")) %>%
  rename(APN = AccountID, MAPLOT = parcel_id, DamageLevel = damage, Address = unit_suite, NotesField = narrative) %>%
  mutate(State = "OR", FIPS = "41029", APNSeq = NA, FireName = "Almeda-Obenchain") %>%
  mutate(FireDate = as.POSIXct("2020-09-09", format = "%Y-%m-%d")) %>%
  dplyr::filter(!(DAFlag %in% c("X: DUP", "X: Public", "X: RV"))) %>% # drop bad records (see Len's data notes)
  dplyr::filter(parcel_type == "Residential" | parcel_type == "residential") # keep residential lots only

## Recode the damage variable
stopifnot(anyNA(jk$DamageLevel) == FALSE)
table(jk$DamageLevel, useNA = "always")
jk$DamageLevel[jk$DamageLevel %in% c("affected", "major", "minor")] <- "Minor"
jk$DamageLevel[jk$DamageLevel == "destroyed"] <- "Destroyed"
jk$DamageLevel[jk$DamageLevel == "unaffected"] <- "NoDamage"
jk$DamageLevel[jk$DamageLevel == "inaccessible"] <- "Unknown"
table(jk$DamageLevel, useNA = "always")
jk$DamageLevel <- factor(jk$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

## -- keep only residential lots.
jk <- dplyr::filter(jk, parcel_type == "Residential" | parcel_type == "residential")

## Drop places without usable APNs
jk <- jk[!is.na(jk$APN), ]
jk <- jk[jk$APN != 0, ]


## -- Handle duplicate APNs
jk <- jk %>%
  arrange(APN, desc(dwelling_type), desc(DamageLevel)) %>%
  group_by(APN) %>%
  mutate(howmanystructures = n()) %>%
  filter(row_number() == 1)

table(jk$howmanystructures, useNA = "ifany")
table(jk$dwelling_type)
anyDuplicated(jk$APN)

## -- Select final columns and add to cleaned data
jk <- dplyr::select(jk, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, jk)
rm(jk)

## Lane County ------

la <- read_excel(file.path(rawdatafolder, "Oregon_Lane/Request - Holiday Farm Fire.xlsx"), sheet = 1) %>%
  rename(APN = `Account #`, DamageLevel = `Extent of Damage`) %>%
  mutate(State = "OR", FIPS = "41039", APNSeq = NA, FireName = "HolidayFarm", NotesField = "") %>%
  mutate(FireDate = as.POSIXct("2020-09-07", format = "%Y-%m-%d"))

## -- Handle duplicate APNs

la <- la %>%
  arrange(APN, desc(`Improvement Value`), desc(DamageLevel)) %>%
  group_by(APN) %>%
  mutate(howmanystructures = n(), copy = row_number())

## There are a small number (11), and almost all seem to be perfect dupes on all columns. Drop.
la <- dplyr::filter(la, copy == 1)

## 37 properties have NA damage. What's up with them?
## 33 of these are really poorly populated - no address, no value, no land use code. I drop those. I keep 4 with street addresses and property use codes.
na_damage <- la[is.na(la$DamageLevel), ]
na_damage <- na_damage$APN[is.na(na_damage$Address)]

la <- dplyr::filter(la, !(APN %in% na_damage))

## Recode the damage variable
table(la$DamageLevel, useNA = "always")
la$DamageLevel[la$DamageLevel %in% c(
  "Affected (1% - 9%)",
  "Major (51% - 75%)",
  "Minor (10% - 25%)",
  "Moderate (26% - 50%"
)] <- "Minor"
la$DamageLevel[is.na(la$DamageLevel)] <- "Unknown"
table(la$DamageLevel, useNA = "always")

## -- Select final columns and add to cleaned data
la <- dplyr::select(la, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, la)
rm(la)

## Lincoln County ----

lco <- read_excel(file.path(rawdatafolder, "Oregon_Lincoln/Fire Properties (1).xlsx"),
  skip = 2, sheet = "Property"
) %>%
  rename(APN = Property, Address = Situs.addr) %>%
  dplyr::filter(!is.na(APN)) %>% # strip total rows out
  dplyr::filter(`20 Imp Appr` > 0) %>% # drop lots with no improvements
  mutate(
    FIPS = 41041, State = "OR", FireName = "EchoMountainComplex",
    NotesField = paste("Pre-fire improvements Value", `20 Imp Appr`, sep = "_"),
    FireDate = as.POSIXct("2020-09-07", format = "%Y-%m-%d"), howmanystructures = NA,
    APNSeq = NA
  )

lco$DamageOverValue <- as.numeric(lco$`Imp Loss`) / lco$`20 Imp Appr`
stopifnot(anyNA(lco$DamageOverValue) == 0)
lco$DamageLevel <- NA
lco$DamageLevel[lco$DamageOverValue >= 0.5] <- "Destroyed"
lco$DamageLevel[lco$DamageOverValue < 0.5 & lco$`Imp Loss` > 0] <- "Minor"
lco$DamageLevel[lco$DamageOverValue < 0.5 & lco$`Imp Loss` == 0] <- "NoDamage"
table(lco$DamageLevel, useNA = "always")

# Handle duplicate APNs. None!
anyDuplicated(lco$APN)

lco <- dplyr::select(lco, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, lco)
rm(lco)

# Marion County -----

ma <- read_excel(file.path(rawdatafolder, "Oregon_Marion/(Ready to Send) MC_ASR Damage Assessment Feb18_1.xlsx")) %>%
  rename(TAXLOT = `Taxlot Number`, Address = `Site Address`, DamageLevel = `Damage Classification`) %>%
  mutate(FireName = "BeachieCreek-Santiam", FireDate = as.POSIXct("2020-08-16", format = "%Y-%m-%d"), FIPS = "41047", State = "OR", APNSeq = NA, howmanystructures = NA) %>%
  mutate(NotesField = `Property Type`) %>%
  dplyr::select(State, FIPS, TAXLOT, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Recode the damage variable
stopifnot(anyNA(ma$DamageLevel) == FALSE)
table(ma$DamageLevel, useNA = "always")
ma$DamageLevel[ma$DamageLevel %in% c("Affected")] <- "Minor"
ma$DamageLevel[ma$DamageLevel == "Destroyed"] <- "Destroyed"
ma$DamageLevel[ma$DamageLevel == "Intact"] <- "NoDamage"
ma$DamageLevel[ma$DamageLevel == "Inaccessible/Unknown"] <- "Unknown"
table(ma$DamageLevel, useNA = "always")
ma$DamageLevel <- factor(ma$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

## Use county parcels shapefile to convert from taxlot numbers to APNs consistent with ZTRAX
mataxlots <- st_read(file.path(rawdatafolder, "Oregon_Marion/taxlot/Parcels.shp"))
ma2 <- merge(ma, mataxlots[, c("TAXLOT", "ALT_TAXACC", "SITUS")], by = "TAXLOT")
setdiff(ma$TAXLOT, ma2$TAXLOT) ## very good merge -- only lose 4 properties. Addresses match closely across TAXLOT-matched records from both. data sets. And it is 1-1: TAXLOT uniquely identifies records in both datasets. Nice.
rm(ma)

ma2 <- dplyr::select(ma2, State, FIPS, ALT_TAXACC, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures) %>%
  rename(APN = ALT_TAXACC)

# Duplicated structures? None!
anyDuplicated(ma2$APN)

# Add to cleaned dataset
cleandata <- rbind(cleandata, ma2)
rm(ma2, mataxlots)


# WASHINGTON ------

## Okanagon County -----

destvaluecutoff <- 40000 # the arbitrary dollar value of SFR value loss for 'Destroyed'

## -- 2015 Okanogan Complex. Should I keep cabins here??
og <- read_excel(file.path(rawdatafolder, "Washington_Okanogan/2015 Okanogan complex structure loss.xlsx")) %>%
  rename(APN = `Parcel#`) %>%
  filter(APN != "Total" & !is.na(APN)) %>% # strip out totals row and two blank rows before it.
  mutate(Address = paste(`Street#`, `Street/Road`, sep = " ")) %>%
  dplyr::filter(`SFR#` > 0 & !is.na(`SFR#`)) %>% # Drops non single family homes. Dropped categories are cabins, garage shop, and outbuildings.
  mutate(
    FireName = "OkanoganComplex", FireDate = as.POSIXct("2015-08-15", format = "%Y-%m-%d"),
    FIPS = "53047", APNSeq = NA, NotesField = "", State = "WA", howmanystructures = NA
  )
# Code the damage variable


og$DamageLevel <- NA
og$DamageLevel[is.na(og$`SFR Value Loss`)] <- "NoDamage"
og$DamageLevel[!is.na(og$`SFR Value Loss`) & og$`SFR Value Loss` >= destvaluecutoff] <- "Destroyed"
og$DamageLevel[!is.na(og$`SFR Value Loss`) & og$`SFR Value Loss` < destvaluecutoff] <- "Minor"
table(og$DamageLevel, useNA = "always")
og$DamageLevel <- factor(og$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# check for dupe APNs. Just one record with a perfect duplicate on all rows. Drop the redundant obs.
#-view dupes
dupes <- og %>%
  filter(duplicated(APN) | duplicated(APN, fromLast = TRUE))

nrow(og)
og <- og[!duplicated(og$APN), ]
nrow(og)

og <- dplyr::select(og, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, og)
rm(og)

## ------------- Carlton Complex----------_#.
cc <- read_excel(file.path(rawdatafolder, "Washington_Okanogan/Carlton Complex Fire Totals Final.xlsx")) %>%
  rename(APN = `Parcel#`) %>%
  filter(!is.na(APN)) %>% # strip out totals row and 1 blank row before it.
  mutate(Address = paste(`Street#`, `Street/Road`, sep = " ")) %>%
  dplyr::filter(`SFR#` > 0 & !is.na(`SFR#`)) %>% # Drops non single family homes. Dropped categories are cabins, garage shop, and outbuildings.
  mutate(
    FireName = "CarltonComplex", FireDate = as.POSIXct("2014-07-14", format = "%Y-%m-%d"),
    FIPS = "53047", APNSeq = NA, NotesField = "", State = "WA", howmanystructures = NA
  )
# Code the damage variable
## -- I do this using an arbitrary cutoff of value loss for destroyed. Better way would be to get the improvements values from ZTRAX and do "destroyed" as those where
## the reported losses here are >50% of the Improvement value.
destvaluecutoff <- 40000
cc$DamageLevel <- NA
cc$DamageLevel[is.na(cc$`SFR Value Loss`)] <- "NoDamage"
cc$DamageLevel[!is.na(cc$`SFR Value Loss`) & cc$`SFR Value Loss` >= destvaluecutoff] <- "Destroyed"
cc$DamageLevel[!is.na(cc$`SFR Value Loss`) & cc$`SFR Value Loss` < destvaluecutoff] <- "Minor"
table(cc$DamageLevel, useNA = "always")
cc$DamageLevel <- factor(cc$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# check for dupe APNs. Two records are duplicated; one a perfect dupe and one not.
#-view dupes
dupes <- cc %>%
  filter(duplicated(APN) | duplicated(APN, fromLast = TRUE))

# Sort the higher value loss first and then drop dupe observations
cc <- arrange(cc, APN, desc(`SFR Value Loss`))
nrow(cc)
cc <- cc[!duplicated(cc$APN), ]
nrow(cc)

cc <- dplyr::select(cc, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, cc)
rm(cc)

## ------------- Cold Springs Fire ----------_#.
csf <- read_excel(file.path(rawdatafolder, "Washington_Okanogan/Cold Springs Fire loss for the public.xlsx")) %>%
  rename(APN = `Parcel#`) %>%
  filter(APN != "TOTALS" & !is.na(APN)) %>% # strip out totals row and 1 blank row before it.
  mutate(Address = paste(`Street#`, `Street/Road`, sep = " ")) %>%
  dplyr::filter(`SFR#` > 0 & !is.na(`SFR#`)) %>% # Drops non single family homes. Dropped categories are cabins, garage shop, and outbuildings.
  mutate(
    FireName = "ColdSprings", FireDate = as.POSIXct("2020-09-06", format = "%Y-%m-%d"),
    FIPS = "53047", APNSeq = NA, NotesField = "", State = "WA", howmanystructures = NA
  )

# Code the damage variable
## -- I do this using an arbitrary cutoff of value loss for destroyed. Better way would be to get the improvements values from ZTRAX and do "destroyed" as those where
## the reported losses here are >50% of the Improvement value.
destvaluecutoff <- 40000
csf$DamageLevel <- NA
csf$DamageLevel[is.na(csf$`SFR Value Loss`)] <- "NoDamage"
csf$DamageLevel[!is.na(csf$`SFR Value Loss`) & csf$`SFR Value Loss` >= destvaluecutoff] <- "Destroyed"
csf$DamageLevel[!is.na(csf$`SFR Value Loss`) & csf$`SFR Value Loss` < destvaluecutoff] <- "Minor"
table(csf$DamageLevel, useNA = "always")
csf$DamageLevel <- factor(csf$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# check for dupe APNs. None!
#-view dupes
dupes <- csf %>%
  filter(duplicated(APN) | duplicated(APN, fromLast = TRUE))

csf <- dplyr::select(csf, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, csf)
rm(csf)

## ------------- Eagle Road Fire ----------_#.
erf <- read_excel(file.path(rawdatafolder, "Washington_Okanogan/EAGLE RD FIRE AUG 1, 2014.xlsx")) %>%
  rename(APN = `Parcel#`) %>%
  filter(!is.na(APN)) %>% # strip out totals row
  mutate(Address = paste(`Street#`, `Street/Road`, sep = " ")) %>%
  dplyr::filter(`SFR#` > 0 & !is.na(`SFR#`)) %>% # Drops non single family homes. Dropped categories are cabins, garage shop, and outbuildings.
  mutate(
    FireName = "Eagle Road Fire", FireDate = as.POSIXct("2014-08-01", format = "%Y-%m-%d"),
    FIPS = "53047", APNSeq = NA, NotesField = "", State = "WA", howmanystructures = NA
  )

# Code the damage variable
## -- I do this using an arbitrary cutoff of value loss for destroyed. Better way would be to get the improvements values from ZTRAX and do "destroyed" as those where
## the reported losses here are >50% of the Improvement value.
destvaluecutoff <- 40000
erf$DamageLevel <- NA
erf$DamageLevel[is.na(erf$`SFR Value Loss`)] <- "NoDamage"
erf$DamageLevel[!is.na(erf$`SFR Value Loss`) & erf$`SFR Value Loss` >= destvaluecutoff] <- "Destroyed"
erf$DamageLevel[!is.na(erf$`SFR Value Loss`) & erf$`SFR Value Loss` < destvaluecutoff] <- "Minor"
table(erf$DamageLevel, useNA = "always")
erf$DamageLevel <- factor(erf$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# check for dupe APNs. None!
#-view dupes
dupes <- erf %>%
  filter(duplicated(APN) | duplicated(APN, fromLast = TRUE))

erf <- dplyr::select(erf, State, FIPS, APN, APNSeq, FireName, FireDate, Address, NotesField, DamageLevel, howmanystructures)

# Add to cleaned dataset
cleandata <- rbind(cleandata, erf)
rm(erf)

# Save collected table ------

# Save original address, APN, lon/lat (just NAs, none of these datasets had this
cleandata <- cleandata %>%
  mutate(
    APN_orig = APN,
    address_orig = Address,
    lon_orig = NA_real_,
    lat_orig = NA_real_,
    in_damage_data = 1
  )

## -- Flag bad/unknown damage records to drop after ZTRAX merge & neighbor identification (so get removed from comparison group too)
cleandata$badrecordflag <- FALSE
cleandata$badrecordflag[cleandata$DamageLevel == "Unknown"] <- TRUE
cleandata$badrecordflag[!is.na(cleandata$howmanystructures) & cleandata$howmanystructures > 1] <- TRUE

# Rename some things for consistency with DINS, so that I can process this with same functions that do DINS further down the pipe.
cleandata <- cleandata %>%
  dplyr::rename(UnformattedAssessorParcelNumber = APN) %>%
  dplyr::rename(INCIDENTNAME = FireName) %>%
  dplyr::rename(DAMAGE = DamageLevel) %>%
  dplyr::mutate(DAMAGE = as.character(DAMAGE)) %>%
  dplyr::rename(date = FireDate) %>%
  dplyr::mutate(date = as.Date(date)) %>%
  dplyr::mutate(FIPS = as.numeric(FIPS)) %>%
  dplyr::mutate(INCIDENTNUM = paste(INCIDENTNAME, FIPS, year(date), sep = "_")) %>%
  dplyr::mutate(
    STRUCTURETYPE = "NotReported", badstructuretype = FALSE,
    ROOFCONSTRUCTION = "NotReported", EAVES = "NotReported",
    EXTERIORSIDING = "NotReported", WINDOWPANE = "NotReported",
    VENTSCREEN = "NotReported", DECKPORCHONGRADE = "NotReported", DECKPORCHELEVATED = "NotReported",
    PATIOCOVERCARPORT = "NotReported", FENCEATTACHEDTOSTRUCTURE = "NotReported"
  )

summary(cleandata)

## View incident and county/state breakdowns
cleandata[, .N, by = INCIDENTNAME][order(-N)]
cleandata[, .N, by = FIPS][order(-N)]
cleandata[, .N, by = State][order(-N)]
cleandata[, .N, by = c("State", "date")][order(State, date)]

## Save the data, and a list of counties that need to be imported from ZTRAX
saveRDS(cleandata, file.path(WORKING, "/damage_other_states.RDS"))
saveRDS(unique(cleandata$FIPS), file.path(WORKING, "/countylist_other_states.RDS"))
